From c1100c28edad0c4494bba80f7b2da1f573d28d44 Mon Sep 17 00:00:00 2001 From: Keir Fraser Date: Thu, 12 Nov 2009 11:43:21 +0000 Subject: [PATCH] Support physical CPU hot-add in xen hypervisor This patch add CPU hot-add in system. a) It mark all CPU as possible when booting, if CONFIG_HOTPLUG_CPU is set. BTW, this will increase per_cpu area. b) When a CPU is added through hypercall, the CPU will be marked as present and offline, and the numa information is setup if numa is supported. The CPU will be brought to online by dom0 online explicitly. Signed-off-by: Jiang, Yunhong --- xen/arch/x86/acpi/boot.c | 1 - xen/arch/x86/mpparse.c | 16 ++++++- xen/arch/x86/numa.c | 8 ++-- xen/arch/x86/platform_hypercall.c | 6 +++ xen/arch/x86/setup.c | 6 ++- xen/arch/x86/smpboot.c | 77 +++++++++++++++++++++++++++++++ xen/arch/x86/srat.c | 7 +-- xen/include/asm-x86/acpi.h | 1 + xen/include/asm-x86/numa.h | 6 +++ xen/include/asm-x86/smp.h | 2 + xen/include/public/platform.h | 9 ++++ 11 files changed, 126 insertions(+), 13 deletions(-) diff --git a/xen/arch/x86/acpi/boot.c b/xen/arch/x86/acpi/boot.c index e6cc2de6f2..8e67cde539 100644 --- a/xen/arch/x86/acpi/boot.c +++ b/xen/arch/x86/acpi/boot.c @@ -81,7 +81,6 @@ u8 acpi_enable_value, acpi_disable_value; #warning ACPI uses CMPXCHG, i486 and later hardware #endif -#define MAX_MADT_ENTRIES 256 u8 x86_acpiid_to_apicid[MAX_MADT_ENTRIES] = {[0 ... MAX_MADT_ENTRIES - 1] = 0xff }; EXPORT_SYMBOL(x86_acpiid_to_apicid); diff --git a/xen/arch/x86/mpparse.c b/xen/arch/x86/mpparse.c index 7d65c2abaf..33648562ae 100644 --- a/xen/arch/x86/mpparse.c +++ b/xen/arch/x86/mpparse.c @@ -35,7 +35,7 @@ /* Have we found an MP table */ int smp_found_config; -unsigned int __initdata maxcpus = NR_CPUS; +unsigned int __devinitdata maxcpus = NR_CPUS; /* * Various Linux-internal data structures created from the @@ -868,6 +868,20 @@ int __devinit mp_register_lapic ( return MP_processor_info(&processor); } +void mp_unregister_lapic(uint32_t apic_id, uint32_t cpu) +{ + if (!cpu || (apic_id == boot_cpu_physical_apicid)) + return; + + if (x86_cpu_to_apicid[cpu] != apic_id) + return; + + physid_clear(apic_id, phys_cpu_present_map); + + x86_cpu_to_apicid[cpu] = BAD_APICID; + cpu_clear(cpu, cpu_present_map); + } + #ifdef CONFIG_X86_IO_APIC #define MP_ISA_BUS 0 diff --git a/xen/arch/x86/numa.c b/xen/arch/x86/numa.c index 167373f834..6f06ac1e62 100644 --- a/xen/arch/x86/numa.c +++ b/xen/arch/x86/numa.c @@ -42,9 +42,9 @@ cpumask_t node_to_cpumask[MAX_NUMNODES] __read_mostly; nodemask_t __read_mostly node_online_map = { { [0] = 1UL } }; /* Default NUMA to off for now. acpi=on required to enable it. */ -int numa_off __initdata = 1; +int numa_off __devinitdata = 1; -int acpi_numa __initdata; +int acpi_numa __devinitdata; /* * Given a shift value, try to populate memnodemap[] @@ -53,7 +53,7 @@ int acpi_numa __initdata; * 0 if memnodmap[] too small (of shift too small) * -1 if node overlap or lost ram (shift too big) */ -static int __init +static int __devinit populate_memnodemap(const struct node *nodes, int numnodes, int shift) { int i; @@ -259,7 +259,7 @@ static __init int numa_setup(char *opt) * prior to this call, and this initialization is good enough * for the fake NUMA cases. */ -void __init init_cpu_to_node(void) +void __devinit init_cpu_to_node(void) { int i; for (i = 0; i < NR_CPUS; i++) { diff --git a/xen/arch/x86/platform_hypercall.c b/xen/arch/x86/platform_hypercall.c index 9bde0d057f..b33cfb8392 100644 --- a/xen/arch/x86/platform_hypercall.c +++ b/xen/arch/x86/platform_hypercall.c @@ -463,6 +463,12 @@ ret_t do_platform_op(XEN_GUEST_HANDLE(xen_platform_op_t) u_xenpf_op) } break; + case XENPF_cpu_hotadd: + ret = cpu_add(op->u.cpu_add.apic_id, + op->u.cpu_add.acpi_id, + op->u.cpu_add.pxm); + break; + default: ret = -ENOSYS; break; diff --git a/xen/arch/x86/setup.c b/xen/arch/x86/setup.c index 1dfffca770..a8d254027e 100644 --- a/xen/arch/x86/setup.c +++ b/xen/arch/x86/setup.c @@ -246,7 +246,7 @@ static void __init init_idle_domain(void) setup_idle_pagetable(); } -static void __init srat_detect_node(int cpu) +void __devinit srat_detect_node(int cpu) { unsigned node; u32 apicid = x86_cpu_to_apicid[cpu]; @@ -484,6 +484,10 @@ void __init __start_xen(unsigned long mbi_p) smp_prepare_boot_cpu(); +#ifdef CONFIG_HOTPLUG_CPU + prefill_possible_map(); +#endif + /* We initialise the serial devices very early so we can get debugging. */ ns16550.io_base = 0x3f8; ns16550.irq = 4; diff --git a/xen/arch/x86/smpboot.c b/xen/arch/x86/smpboot.c index 5d05c1e5cc..024896fa4a 100644 --- a/xen/arch/x86/smpboot.c +++ b/xen/arch/x86/smpboot.c @@ -1435,6 +1435,78 @@ void enable_nonboot_cpus(void) */ smpboot_restore_warm_reset_vector(); } + +int prefill_possible_map(void) +{ + int i; + + for (i = 0; i < NR_CPUS; i++) + cpu_set(i, cpu_possible_map); + return 0; +} + +int cpu_add(uint32_t apic_id, uint32_t acpi_id, uint32_t pxm) +{ + int cpu = -1; + +#ifndef CONFIG_ACPI + return -ENOSYS; +#endif + + dprintk(XENLOG_DEBUG, "cpu_add apic_id %x acpi_id %x pxm %x\n", + apic_id, acpi_id, pxm); + + if ( acpi_id > MAX_MADT_ENTRIES || apic_id > MAX_APICS || pxm > 256 ) + return -EINVAL; + + /* Detect if the cpu has been added before */ + if ( x86_acpiid_to_apicid[acpi_id] != 0xff) + { + if (x86_acpiid_to_apicid[acpi_id] != apic_id) + return -EINVAL; + else + return -EEXIST; + } + + if ( physid_isset(apic_id, phys_cpu_present_map) ) + return -EEXIST; + + spin_lock(&cpu_add_remove_lock); + + cpu = mp_register_lapic(apic_id, 1); + + if (cpu < 0) + { + spin_unlock(&cpu_add_remove_lock); + return cpu; + } + + x86_acpiid_to_apicid[acpi_id] = apic_id; + + if ( !srat_disabled() ) + { + int node; + + node = setup_node(pxm); + if (node < 0) + { + dprintk(XENLOG_WARNING, "Setup node failed for pxm %x\n", pxm); + x86_acpiid_to_apicid[acpi_id] = 0xff; + mp_unregister_lapic(apic_id, cpu); + spin_unlock(&cpu_add_remove_lock); + return node; + } + apicid_to_node[apic_id] = node; + } + + srat_detect_node(cpu); + numa_add_cpu(cpu); + spin_unlock(&cpu_add_remove_lock); + dprintk(XENLOG_INFO, "Add CPU %x with index %x\n", apic_id, cpu); + return cpu; +} + + #else /* ... !CONFIG_HOTPLUG_CPU */ int __cpu_disable(void) { @@ -1446,6 +1518,11 @@ void __cpu_die(unsigned int cpu) /* We said "no" in __cpu_disable */ BUG(); } + +int cpu_add(uint32_t apic_id, uint32_t acpi_id, uint32_t pxm) +{ + return -ENOSYS; +} #endif /* CONFIG_HOTPLUG_CPU */ int __devinit __cpu_up(unsigned int cpu) diff --git a/xen/arch/x86/srat.c b/xen/arch/x86/srat.c index 9deeda077c..13a035ce45 100644 --- a/xen/arch/x86/srat.c +++ b/xen/arch/x86/srat.c @@ -41,7 +41,7 @@ int pxm_to_node(int pxm) return (signed char)pxm2node[pxm]; } -static __init int setup_node(int pxm) +__devinit int setup_node(int pxm) { unsigned node = pxm2node[pxm]; if (node == 0xff) { @@ -93,11 +93,6 @@ static __init void bad_srat(void) apicid_to_node[i] = NUMA_NO_NODE; } -static __init inline int srat_disabled(void) -{ - return numa_off || acpi_numa < 0; -} - /* * A lot of BIOS fill in 10 (= no distance) everywhere. This messes * up the NUMA heuristics which wants the local node to have a smaller diff --git a/xen/include/asm-x86/acpi.h b/xen/include/asm-x86/acpi.h index 7666c2cfad..299f56266f 100644 --- a/xen/include/asm-x86/acpi.h +++ b/xen/include/asm-x86/acpi.h @@ -150,6 +150,7 @@ struct acpi_sleep_info { #endif /* CONFIG_ACPI_SLEEP */ +#define MAX_MADT_ENTRIES 256 extern u8 x86_acpiid_to_apicid[]; #define MAX_LOCAL_APIC 256 diff --git a/xen/include/asm-x86/numa.h b/xen/include/asm-x86/numa.h index 5486404282..cabb46f935 100644 --- a/xen/include/asm-x86/numa.h +++ b/xen/include/asm-x86/numa.h @@ -30,7 +30,13 @@ extern void numa_add_cpu(int cpu); extern void numa_init_array(void); extern int numa_off; +static __devinit inline int srat_disabled(void) +{ + return numa_off || acpi_numa < 0; +} extern void numa_set_node(int cpu, int node); +extern int setup_node(int pxm); +extern void srat_detect_node(int cpu); extern void setup_node_bootmem(int nodeid, u64 start, u64 end); extern unsigned char apicid_to_node[256]; diff --git a/xen/include/asm-x86/smp.h b/xen/include/asm-x86/smp.h index 066612c3a3..515cc9a1e0 100644 --- a/xen/include/asm-x86/smp.h +++ b/xen/include/asm-x86/smp.h @@ -66,6 +66,8 @@ extern void cpu_exit_clear(void); extern void cpu_uninit(void); extern void disable_nonboot_cpus(void); extern void enable_nonboot_cpus(void); +int prefill_possible_map(void); +int cpu_add(uint32_t apic_id, uint32_t acpi_id, uint32_t pxm); #else static inline int cpu_is_offline(int cpu) {return 0;} static inline void disable_nonboot_cpus(void) {} diff --git a/xen/include/public/platform.h b/xen/include/public/platform.h index ff6837ae10..0fce5690d2 100644 --- a/xen/include/public/platform.h +++ b/xen/include/public/platform.h @@ -338,6 +338,14 @@ struct xenpf_cpu_ol typedef struct xenpf_cpu_ol xenpf_cpu_ol_t; DEFINE_XEN_GUEST_HANDLE(xenpf_cpu_ol_t); +#define XENPF_cpu_hotadd 58 +struct xenpf_cpu_hotadd +{ + uint32_t apic_id; + uint32_t acpi_id; + uint32_t pxm; +}; + struct xen_platform_op { uint32_t cmd; uint32_t interface_version; /* XENPF_INTERFACE_VERSION */ @@ -355,6 +363,7 @@ struct xen_platform_op { struct xenpf_set_processor_pminfo set_pminfo; struct xenpf_pcpuinfo pcpu_info; struct xenpf_cpu_ol cpu_ol; + struct xenpf_cpu_hotadd cpu_add; uint8_t pad[128]; } u; }; -- 2.30.2